notebook.community

Edit and run



In [1]:

    
from __future__ import print_function

import numpy as np
import pandas as pd
from collections import OrderedDict #sorting participant df dict before pd.concat()
import matplotlib.pylab as plt
%matplotlib inline
pd.options.display.mpl_style = 'default'

TASK_NAMES_USING = [
                  'T1_SMS_5',       'T1_SMS_8', 
                  'Ticks_ISO_T2_5', 'Ticks_ISO_T2_8',
                  'Ticks_Linear_5', 'Ticks_Linear_8',
                  'Ticks_Phase_5',  'Ticks_Phase_8',
                  'Jits_ISO_5',     'Jits_ISO_8',
                  'Jits_Phase_5',   'Jits_Phase_8',            
                  'Jits_Linear_5',  'Jits_Linear_8',
                  'ISIP_5',         'ISIP_8', 
                  'Improv_Metronome',
                  'Improv_Melody',     
                  ]

pid_exclusions = {k: [] for k in TASK_NAMES_USING}

REMOVE_TASKS = ['Ticks_Pract_8', 'Ticks_Pract_5', 
                    'Practice_Tick_8', 'Practice_Tick_5',
                    'Jits_Pract_8', 'Jits_Pract_5',
                    'Practice_jittr_8', 'Practice_jittr_5',
                    'Pattern_Practice', ]

PRACTICE_TASK_NUMBERS = [0, 1, 11, 24, 25]
PITCH_R_PAD = 48
PITCH_L_PAD = 38
#PITCH_TICK = 42

CHANNEL_TAP = 1
CHANNEL_PIANO = 2
CHANNEL_LOOPBACK = 5



In [2]:

    
#EXPECTED_LOOPBACK_COUNTS = {
#    'T1_SMS_5': 130,
#    'T1_SMS_8': 120,
#    'Ticks_ISO_T2_5': 130,
#    'Ticks_ISO_T2_8': 120,
#    'Ticks_Linear_5': 170,
#    'Ticks_Linear_8': 170,
#    'Ticks_Phase_5': 170,
#    'Ticks_Phase_8': 170,
#    'Jits_ISO_5': 360,
#    'Jits_ISO_8': 360,
#    'Jits_Phase_5': 510,
#    'Jits_Phase_8': 510,
#    'Jits_Linear_5': 510,
#    'Jits_Linear_8': 510,
#    'ISIP_5': 40,
#    'ISIP_8': 30,
#    'Improv_Metronome': 140,
#    'Improv_Melody': 162,
#    }



In [3]:

    
def participant_df(csvfilename):
    '''
    clean up some of the raw CSVs' idiosyncracies 
    and load into a one-participant DataFrame.
    '''
    import os  #getting csv file paths
    from StringIO import StringIO
    csv_lines = open(csvfilename).readlines()
    csv_edited = ('csv_line,stamp_type,' +
                  'run_count,task_id,task_name,i,channel,pitch,velocity,micros,blank' + 
                  '\n')
    for idx, line in enumerate(csv_lines):
        # Lines not to parse: per-task headings, end markers, non-timestamped targets
        # (silent, unused targets in ISIP timestamped micros=0 in early admin software version)
        if any(s in line for s in ['[', 'taskRunCount', 'end,end,end', 'IntervalOut,X,X,0']):
            continue
        line_number = str(idx)
        if ('IntervalOut' in line):
            stamp_type = 'target' 
            line = line.replace('X,', ',')    
            line = line.replace('IntervalOut,', ',')
        else:
            stamp_type = 'midi' #this doesn't distinguish between performance (ch1) and loopback (ch5)
                                #...let alone left/right tap!
        csv_edited += (line_number + ',' + 
                       stamp_type + ',' +
                       line)
    df = pd.read_csv(StringIO(csv_edited), 
                 index_col = ['task_name','csv_line'], #was taskname, stamp_type, csvline
                 header = 0,  
                 )
    df = df.drop('blank', axis=1)
    return df



In [4]:

    
#list files in subdir that end in .csv
import os

path = './csv_raw_data'
allfiles = os.listdir(path)
csvfiles = [os.path.join(path, f) 
            for f in list(allfiles) 
            if os.path.splitext(f)[1]==".csv"]

#something fails at this point when running on Wakari 
#(so, maybe any linux platform?) - the file names are correct,
# but participant_df(f) returns blank dataframes.
sub_dfs = {os.path.basename(f)[3:6]: participant_df(f) 
           for f in csvfiles}
sub_dfs_sorted = OrderedDict(sorted(sub_dfs.items(), key=lambda t: t[0]))

dbase = pd.concat(sub_dfs_sorted.values(), 
                  #axis=0, #defaults
                  #join='outer', 
                  #join_axes=None, 
                  #ignore_index=False, 
                  keys=sub_dfs_sorted.keys(),  # participant id strings
                  #levels=None, 
                  names=['pid'], 
                  #verify_integrity=False
                  )



In [5]:

    
for t in REMOVE_TASKS:
    dbase = dbase.drop(REMOVE_TASKS, axis=0, level='task_name')
    
pid_list = sorted(list(dbase.index.get_level_values('pid').unique()))
print(pid_list)
task_name_list = sorted(list(dbase.index.get_level_values('task_name').unique()))
print(task_name_list)









    



['015', '016', '017', '018', '019', '020', '021', '022', '023', '024', '025', '026', '027', '028', '029', '030', '031', '032', '033', '034', '035', '036', '037', '038', '039', '040', '041', '042', '043', '044', '045', '046', '047', '048', '049', '050', '051', '052', '053', '054', '055', '056', '057', '058', '059', '060', '061', '062', '063', '064', '065', '066', '067', '068', '069', '070', '071', '072', '073', '074', '075', '076', '077', '078', '079', '080', '081', '082', '083', '084', '085', '086', '087', '088', '089', '090', '091', '092', '093', '094', '095', '096', '097', '098', '099', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121']
['ISIP_5', 'ISIP_8', 'Improv_Melody', 'Improv_Metronome', 'Jits_ISO_5', 'Jits_ISO_8', 'Jits_Linear_5', 'Jits_Linear_8', 'Jits_Phase_5', 'Jits_Phase_8', 'Pattern_Record', 'T1_SMS_5', 'T1_SMS_8', 'Ticks_ISO_T2_5', 'Ticks_ISO_T2_8', 'Ticks_Linear_5', 'Ticks_Linear_8', 'Ticks_Phase_5', 'Ticks_Phase_8']



In [6]:

    
# promote task_name above pid to top of index hierarchy
dbase = dbase.swaplevel(0,1, axis=0) 
dbase = dbase.sort()
#This puts the tasks in alphabetical order, but we'll need to get the original
#task presentation order eventually. Can't use run_count easily (because it 
#goes back to 1 if the machine was reset), but we could compare the 
#min(csv_line) for each task, since that doesn't reset.

dbase.loc[(dbase.channel==CHANNEL_TAP) & (dbase.pitch==PITCH_R_PAD), 'stamp_type'] = 'tap_r'
dbase.loc[(dbase.channel==CHANNEL_TAP) & (dbase.pitch==PITCH_L_PAD), 'stamp_type'] = 'tap_l'
dbase.loc[(dbase.channel==CHANNEL_TAP) 
          & (dbase.pitch != PITCH_L_PAD) 
          & (dbase.pitch != PITCH_R_PAD), 'stamp_type'] = 'tap_trigger_error'
dbase.ix[dbase.channel==CHANNEL_PIANO, 'stamp_type'] = 'piano'
dbase.ix[dbase.channel==CHANNEL_LOOPBACK, 'stamp_type'] = 'loopback'

assert set(dbase.stamp_type.values) == set(['loopback', 'piano', 'tap_r', 
                                            'tap_l', 'tap_trigger_error', 
                                            'target'])



In [7]:

    
# removing duplicate tasks manually from CSV files-- running out of memory
# while trying to do this in pandas (below)

#grouped = dbase.groupby(level=['task_name', 'pid'])
#
#def select_rerun_tasks(df):
#    trc_min = df.run_count.min()
#    trc_max = df.run_count.max()    
#    df['trc_min'] = trc_min
#    df['trc_max'] = trc_max    
#    subset = df.loc[df.trc_min != df.trc_max]
#    return subset
#
#db_duplicate_issues = grouped.apply(select_rerun_tasks)
#
#dbdi = db_duplicate_issues.loc[(db_duplicate_issues.stamp_type=='loopback')
#                                & (db_duplicate_issues.task_id != PRACTICE_TASK_NUMBERS[0])
#                                & (db_duplicate_issues.task_id != PRACTICE_TASK_NUMBERS[1])
#                                & (db_duplicate_issues.task_id != PRACTICE_TASK_NUMBERS[2])
#                                & (db_duplicate_issues.task_id != PRACTICE_TASK_NUMBERS[3])
#                                & (db_duplicate_issues.task_id != PRACTICE_TASK_NUMBERS[4])
#                                ]
#
#dbdi.to_csv('duplicate_issues_v3.csv')
#currently: remaining duplicate issue is pid 013, improv_metronome. Couldn't
#           resolve this based on my notes-- listen to the audio recording.



In [9]:

    
# Convert timestamps to milliseconds relative to start of task
grouped = dbase.groupby(level=['task_name', 'pid'])
def compute_within_task_millis(df):
    df_loopback = df.loc[df.stamp_type=='loopback']
    start_micros = df_loopback.micros.min()
    df['task_ms'] = (df.micros - start_micros)/1000.
    return df
dbase = grouped.apply(compute_within_task_millis)

Computing intervals within task+pid+stamptype (mainly useful for ISIP tasks)



In [10]:

    
dbase = dbase.set_index('stamp_type', append=True)
dbase = dbase.swaplevel(2,3) # new hierarchy: task, pid, stamp_type, csv_line
dbase = dbase.sort() #make groups of stamp_type within task/pid
dbase.head()









    Out[10]:






  
    
      
      
      
      
      run_count
      task_id
      i
      channel
      pitch
      velocity
      micros
      task_ms
    
    
      task_name
      pid
      stamp_type
      csv_line
      
      
      
      
      
      
      
      
    
  
  
    
      ISIP_5
      015
      loopback
      8777
       12
       5
       0
       5
       42
       127
       1338601736
          0.000
    
    
      8778
       12
       5
       1
       5
       42
       127
       1339101656
        499.920
    
    
      8779
       12
       5
       2
       5
       42
       127
       1339602176
       1000.440
    
    
      8780
       12
       5
       3
       5
       42
       127
       1340101948
       1500.212
    
    
      8782
       12
       5
       5
       5
       42
       127
       1340601716
       1999.980
    
  

5 rows × 8 columns



In [11]:

    
grouped = dbase.groupby(level=['task_name', 'pid', 'stamp_type'])

def compute_intervals_unfiltered(df):    
    df['int_raw'] = df.task_ms - df.task_ms.shift(1)
    return df
dbase = grouped.apply(compute_intervals_unfiltered)

dbase.head()









    Out[11]:






  
    
      
      
      
      
      run_count
      task_id
      i
      channel
      pitch
      velocity
      micros
      task_ms
      int_raw
    
    
      task_name
      pid
      stamp_type
      csv_line
      
      
      
      
      
      
      
      
      
    
  
  
    
      ISIP_5
      015
      loopback
      8777
       12
       5
       0
       5
       42
       127
       1338601736
          0.000
           NaN
    
    
      8778
       12
       5
       1
       5
       42
       127
       1339101656
        499.920
       499.920
    
    
      8779
       12
       5
       2
       5
       42
       127
       1339602176
       1000.440
       500.520
    
    
      8780
       12
       5
       3
       5
       42
       127
       1340101948
       1500.212
       499.772
    
    
      8782
       12
       5
       5
       5
       42
       127
       1340601716
       1999.980
       499.768
    
  

5 rows × 9 columns

Pickle dbase



In [12]:

    
pickle_dbase = "c:/db_pickles/pickle - dbase - 2014-10-03b.pickle"
dbase.to_pickle(pickle_dbase)

dbase[:10]









    Out[12]:






  
    
      
      
      
      
      run_count
      task_id
      i
      channel
      pitch
      velocity
      micros
      task_ms
      int_raw
    
    
      task_name
      pid
      stamp_type
      csv_line
      
      
      
      
      
      
      
      
      
    
  
  
    
      ISIP_5
      015
      loopback
      8777
       12
       5
        0
       5
       42
       127
       1338601736
          0.000
           NaN
    
    
      8778
       12
       5
        1
       5
       42
       127
       1339101656
        499.920
       499.920
    
    
      8779
       12
       5
        2
       5
       42
       127
       1339602176
       1000.440
       500.520
    
    
      8780
       12
       5
        3
       5
       42
       127
       1340101948
       1500.212
       499.772
    
    
      8782
       12
       5
        5
       5
       42
       127
       1340601716
       1999.980
       499.768
    
    
      8784
       12
       5
        7
       5
       42
       127
       1341101468
       2499.732
       499.752
    
    
      8786
       12
       5
        9
       5
       42
       127
       1341602040
       3000.304
       500.572
    
    
      8788
       12
       5
       11
       5
       42
       127
       1342101840
       3500.104
       499.800
    
    
      8790
       12
       5
       13
       5
       42
       127
       1342601588
       3999.852
       499.748
    
    
      8792
       12
       5
       15
       5
       42
       127
       1343102384
       4500.648
       500.796
    
  

10 rows × 9 columns



In [17]:

    
test = dbase.xs('T1_SMS_8', level='task_name').xs('loopback', level='stamp_type')
test.groupby(level='pid').count()









    Out[17]:






  
    
      
      run_count
      task_id
      i
      channel
      pitch
      velocity
      micros
      task_ms
      int_raw
    
    
      pid
      
      
      
      
      
      
      
      
      
    
  
  
    
      010
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      011
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      012
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      014
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      015
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      016
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      017
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      018
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      019
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      020
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      021
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      022
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      023
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      024
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      025
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      026
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      027
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      028
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      029
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      030
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      032
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      033
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      034
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      035
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      036
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      037
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      038
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      039
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      040
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      041
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      092
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      093
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      094
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      095
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      096
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      097
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      098
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      099
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      100
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      101
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      102
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      103
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      104
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      105
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      106
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      107
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      108
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      109
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      110
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      111
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      112
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      113
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      114
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      115
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      116
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      117
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      118
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      119
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      120
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
    
      121
       120
       120
       120
       120
       120
       120
       120
       120
       119
    
  

110 rows × 9 columns

Below here is exploratory - nothing below is used in later steps. (We proceed from pickle file in Pt. 2.)

Preliminary scatterplot, isip500 / 800



In [173]:

    
data = dfo #.drop(['048', '049', '055'])

plt.figure(figsize=(8,5))
plt.scatter(data['isip8_sq2dev_mean_sqrt'], data['isip5_sq2dev_mean_sqrt'])
plt.show()



In [174]:

    
plt.figure(figsize=(8,5))
plt.scatter(dfo['isip5_ints_pstd'], dfo['isip8_ints_pstd'])
plt.show()

Testing / visualizing - not needed in final 'product'

Plotting interval results after filtering steps



In [175]:

    
def scatter_tooltips(df, x_col, y_col, 
                 size_col=None, 
                 color_col=None, 
                 show_all_cols=False,
                 fig_size=(8, 5)):
    #import matplotlib.pyplot as plt
    #import numpy as np
    import pandas as pd
    import mpld3
    from mpld3 import plugins

    #x = df[x_col]
    #y = df[y_col]
    df_info = [x_col, y_col]
    #for arg in args:
    #    df_info.append(arg)

    # Define some CSS to control our custom labels
    css = """
    table { border-collapse: collapse; }
    th { color: #ffffff; background-color: #000000; }
    td { background-color: #cccccc; }
    table, th, td { font-family:Arial, Helvetica, sans-serif;
                    border: 1px solid black; text-align: right; }
    """

    fig, ax = plt.subplots()
    fig.set_size_inches(fig_size)
    ax.grid(True, alpha=0.3)

    labels = []
    for row in df.iterrows():
        index, series = row
        pid = index
        label = pd.DataFrame(series)
        labels.append(str(label.to_html()))

    points = ax.plot(df[x_col], 
                     df[y_col], 
                     'o', 
                     color='b',
                     markeredgecolor='k', 
                     markersize=8, 
                     markeredgewidth=1, 
                     alpha=.6)
    ax.set_xlabel(x_col)
    ax.set_ylabel(y_col)
    ax.set_title(x_col + ' . ' + y_col, size=16)
    tooltip = plugins.PointHTMLTooltip(points[0], labels,
                                       voffset=10, hoffset=10, css=css)
    plugins.connect(fig, tooltip)

    return mpld3.display()



In [176]:

    
scatter_tooltips(data, 'isip8_sq2dev_mean_sqrt', 'isip5_sq2dev_mean_sqrt',
                 fig_size=(12, 7.5))









    Out[176]:



In [132]:

    
def d3plot(x, y, size=(10,6)):
    import mpld3

    fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
    fig.set_size_inches((8,5) )
    scatter = ax.scatter(x, y,
                         #c=np.random.random(size=N),
                         s=40, #size
                         alpha=0.5,
                         cmap=plt.cm.jet)
    ax.grid(color='white', linestyle='solid')
    ax.set_title("Scatter Plot (with tooltips!)", size=10)
    labels = ['{0}'.format(pid) for pid in x.index]
    tooltip = mpld3.plugins.PointLabelTooltip(scatter, labels=labels)
    mpld3.plugins.connect(fig, tooltip)
    return mpld3.display()



In [133]:

    
d3plot(data['isip5_sq2devsum'], data['isip8_sq2devsum'])









    Out[133]:

intervals after filtering



In [134]:

    
for pid in pid_list[40:80]:
    try:
        sset = db_isip8.xs([pid, 'tap_r'], level=['pid', 'stamp_type'])
        data = sset.ints
        if data.min() > 700 and data.max() < 900: continue        
        print(pid)
        plt.figure(figsize=(13,6))
        data.hist(bins=100)
        #annotating non-midpoint values
        caption_y_increment = 0.2
        median = data.median()
        prev_ypos = 0
        for idx, value in enumerate(data):
            if np.abs(value - median) > 150:
                caption = str(data.index[idx]) + ": " + str(value.round(1))
                plt.annotate(caption, (value, prev_ypos + caption_y_increment))
                prev_ypos += caption_y_increment
        plt.show()
    except:
        print("error....")


#p056: high outlier at 9138: 924.0 ms  ***
#045, 046: error or no data

#need lower bound of >632 for participant 048 (extra long intervals from explicit midpoint counting)

#p. 064: low outlier at about 630-640ms



In [26]:

    
sset = db_isip8.xs(['056', 'tap_r'], level=['pid', 'stamp_type'])
print(sset.ints.mean())
print(sset.ints.std())









    



728.982048
37.2019991333



In [18]:

    
for pid in pid_list:
    try:
        sset = db_isip5.xs([pid, 'tap_r'], level=['pid', 'stamp_type'])
        data = sset.ints
        if data.min() > 400 or data.max() < 600: continue        
        print(pid)
        plt.figure(figsize=(13,6))
        data.hist(bins=100)
        #annotating non-midpoint values
        caption_y_increment = 0.2
        median = data.median()
        prev_ypos = 0
        for idx, value in enumerate(data):
            if np.abs(value - median) > 150:
                caption = str(data.index[idx]) + ": " + str(value.round(1))
                plt.annotate(caption, (value, prev_ypos + caption_y_increment))
                prev_ypos += caption_y_increment
        plt.show()
    except:
        print("error....")

#High outliers:
#p049 - will need to remove manually



In [99]:

    
taps = db_isip5.xs('tap_r', level='stamp_type').ints
pmeans = taps.groupby(level='pid').mean()

data = pmeans

##############################

plt.figure(figsize=(13,6))
data.hist(bins=25)

#annotating non-midpoint values
caption_y_increment = 1
median = data.median()
prev_ypos = 0
for idx, value in enumerate(data):
    if np.abs(value - median) > 25:
        caption = str(data.index[idx]) + ": " + str(value.round(1))
        plt.annotate(caption, (value, prev_ypos + caption_y_increment))
        prev_ypos += caption_y_increment
plt.show()

#why is pid 049's mean so low?



In [98]:

    
taps = db_isip8.xs('tap_r', level='stamp_type').ints
pmeans = taps.groupby(level='pid').mean()

data = pmeans

##############################

plt.figure(figsize=(13,6))
data.hist(bins=25)

#annotating non-midpoint values
caption_y_increment = 0.8
median = data.median()
prev_ypos = 0
for idx, value in enumerate(data):
    if np.abs(value - median) > 60:
        caption = str(data.index[idx]) + ": " + str(value.round(1))
        plt.annotate(caption, (value, prev_ypos + caption_y_increment))
        prev_ypos += caption_y_increment
plt.show()

#Good distribution-- 048 is a bit high

				run_count	task_id	i	channel	pitch	velocity	micros	task_ms
task_name	pid	stamp_type	csv_line
ISIP_5	015	loopback	8777	12	5	0	5	42	127	1338601736	0.000
			8778	12	5	1	5	42	127	1339101656	499.920
			8779	12	5	2	5	42	127	1339602176	1000.440
			8780	12	5	3	5	42	127	1340101948	1500.212
			8782	12	5	5	5	42	127	1340601716	1999.980

	run_count	task_id	i	channel	pitch	velocity	micros	task_ms	int_raw
pid
010	120	120	120	120	120	120	120	120	119
011	120	120	120	120	120	120	120	120	119
012	120	120	120	120	120	120	120	120	119
014	120	120	120	120	120	120	120	120	119
015	120	120	120	120	120	120	120	120	119
016	120	120	120	120	120	120	120	120	119
017	120	120	120	120	120	120	120	120	119
018	120	120	120	120	120	120	120	120	119
019	120	120	120	120	120	120	120	120	119
020	120	120	120	120	120	120	120	120	119
021	120	120	120	120	120	120	120	120	119
022	120	120	120	120	120	120	120	120	119
023	120	120	120	120	120	120	120	120	119
024	120	120	120	120	120	120	120	120	119
025	120	120	120	120	120	120	120	120	119
026	120	120	120	120	120	120	120	120	119
027	120	120	120	120	120	120	120	120	119
028	120	120	120	120	120	120	120	120	119
029	120	120	120	120	120	120	120	120	119
030	120	120	120	120	120	120	120	120	119
032	120	120	120	120	120	120	120	120	119
033	120	120	120	120	120	120	120	120	119
034	120	120	120	120	120	120	120	120	119
035	120	120	120	120	120	120	120	120	119
036	120	120	120	120	120	120	120	120	119
037	120	120	120	120	120	120	120	120	119
038	120	120	120	120	120	120	120	120	119
039	120	120	120	120	120	120	120	120	119
040	120	120	120	120	120	120	120	120	119
041	120	120	120	120	120	120	120	120	119
...	...	...	...	...	...	...	...	...	...
092	120	120	120	120	120	120	120	120	119
093	120	120	120	120	120	120	120	120	119
094	120	120	120	120	120	120	120	120	119
095	120	120	120	120	120	120	120	120	119
096	120	120	120	120	120	120	120	120	119
097	120	120	120	120	120	120	120	120	119
098	120	120	120	120	120	120	120	120	119
099	120	120	120	120	120	120	120	120	119
100	120	120	120	120	120	120	120	120	119
101	120	120	120	120	120	120	120	120	119
102	120	120	120	120	120	120	120	120	119
103	120	120	120	120	120	120	120	120	119
104	120	120	120	120	120	120	120	120	119
105	120	120	120	120	120	120	120	120	119
106	120	120	120	120	120	120	120	120	119
107	120	120	120	120	120	120	120	120	119
108	120	120	120	120	120	120	120	120	119
109	120	120	120	120	120	120	120	120	119
110	120	120	120	120	120	120	120	120	119
111	120	120	120	120	120	120	120	120	119
112	120	120	120	120	120	120	120	120	119
113	120	120	120	120	120	120	120	120	119
114	120	120	120	120	120	120	120	120	119
115	120	120	120	120	120	120	120	120	119
116	120	120	120	120	120	120	120	120	119
117	120	120	120	120	120	120	120	120	119
118	120	120	120	120	120	120	120	120	119
119	120	120	120	120	120	120	120	120	119
120	120	120	120	120	120	120	120	120	119
121	120	120	120	120	120	120	120	120	119

	run_count	task_id	i	channel	pitch	velocity	micros	task_ms	int_raw
pid
010	120	120	120	120	120	120	120	120	119
011	120	120	120	120	120	120	120	120	119
012	120	120	120	120	120	120	120	120	119
014	120	120	120	120	120	120	120	120	119
015	120	120	120	120	120	120	120	120	119
016	120	120	120	120	120	120	120	120	119
017	120	120	120	120	120	120	120	120	119
018	120	120	120	120	120	120	120	120	119
019	120	120	120	120	120	120	120	120	119
020	120	120	120	120	120	120	120	120	119
021	120	120	120	120	120	120	120	120	119
022	120	120	120	120	120	120	120	120	119
023	120	120	120	120	120	120	120	120	119
024	120	120	120	120	120	120	120	120	119
025	120	120	120	120	120	120	120	120	119
026	120	120	120	120	120	120	120	120	119
027	120	120	120	120	120	120	120	120	119
028	120	120	120	120	120	120	120	120	119
029	120	120	120	120	120	120	120	120	119
030	120	120	120	120	120	120	120	120	119
032	120	120	120	120	120	120	120	120	119
033	120	120	120	120	120	120	120	120	119
034	120	120	120	120	120	120	120	120	119
035	120	120	120	120	120	120	120	120	119
036	120	120	120	120	120	120	120	120	119
037	120	120	120	120	120	120	120	120	119
038	120	120	120	120	120	120	120	120	119
039	120	120	120	120	120	120	120	120	119
040	120	120	120	120	120	120	120	120	119
041	120	120	120	120	120	120	120	120	119
...	...	...	...	...	...	...	...	...	...
092	120	120	120	120	120	120	120	120	119
093	120	120	120	120	120	120	120	120	119
094	120	120	120	120	120	120	120	120	119
095	120	120	120	120	120	120	120	120	119
096	120	120	120	120	120	120	120	120	119
097	120	120	120	120	120	120	120	120	119
098	120	120	120	120	120	120	120	120	119
099	120	120	120	120	120	120	120	120	119
100	120	120	120	120	120	120	120	120	119
101	120	120	120	120	120	120	120	120	119
102	120	120	120	120	120	120	120	120	119
103	120	120	120	120	120	120	120	120	119
104	120	120	120	120	120	120	120	120	119
105	120	120	120	120	120	120	120	120	119
106	120	120	120	120	120	120	120	120	119
107	120	120	120	120	120	120	120	120	119
108	120	120	120	120	120	120	120	120	119
109	120	120	120	120	120	120	120	120	119
110	120	120	120	120	120	120	120	120	119
111	120	120	120	120	120	120	120	120	119
112	120	120	120	120	120	120	120	120	119
113	120	120	120	120	120	120	120	120	119
114	120	120	120	120	120	120	120	120	119
115	120	120	120	120	120	120	120	120	119
116	120	120	120	120	120	120	120	120	119
117	120	120	120	120	120	120	120	120	119
118	120	120	120	120	120	120	120	120	119
119	120	120	120	120	120	120	120	120	119
120	120	120	120	120	120	120	120	120	119
121	120	120	120	120	120	120	120	120	119

	run_count	task_id	i	channel	pitch	velocity	micros	task_ms	int_raw
pid
010	120	120	120	120	120	120	120	120	119
011	120	120	120	120	120	120	120	120	119
012	120	120	120	120	120	120	120	120	119
014	120	120	120	120	120	120	120	120	119
015	120	120	120	120	120	120	120	120	119
016	120	120	120	120	120	120	120	120	119
017	120	120	120	120	120	120	120	120	119
018	120	120	120	120	120	120	120	120	119
019	120	120	120	120	120	120	120	120	119
020	120	120	120	120	120	120	120	120	119
021	120	120	120	120	120	120	120	120	119
022	120	120	120	120	120	120	120	120	119
023	120	120	120	120	120	120	120	120	119
024	120	120	120	120	120	120	120	120	119
025	120	120	120	120	120	120	120	120	119
026	120	120	120	120	120	120	120	120	119
027	120	120	120	120	120	120	120	120	119
028	120	120	120	120	120	120	120	120	119
029	120	120	120	120	120	120	120	120	119
030	120	120	120	120	120	120	120	120	119
032	120	120	120	120	120	120	120	120	119
033	120	120	120	120	120	120	120	120	119
034	120	120	120	120	120	120	120	120	119
035	120	120	120	120	120	120	120	120	119
036	120	120	120	120	120	120	120	120	119
037	120	120	120	120	120	120	120	120	119
038	120	120	120	120	120	120	120	120	119
039	120	120	120	120	120	120	120	120	119
040	120	120	120	120	120	120	120	120	119
041	120	120	120	120	120	120	120	120	119
...	...	...	...	...	...	...	...	...	...
092	120	120	120	120	120	120	120	120	119
093	120	120	120	120	120	120	120	120	119
094	120	120	120	120	120	120	120	120	119
095	120	120	120	120	120	120	120	120	119
096	120	120	120	120	120	120	120	120	119
097	120	120	120	120	120	120	120	120	119
098	120	120	120	120	120	120	120	120	119
099	120	120	120	120	120	120	120	120	119
100	120	120	120	120	120	120	120	120	119
101	120	120	120	120	120	120	120	120	119
102	120	120	120	120	120	120	120	120	119
103	120	120	120	120	120	120	120	120	119
104	120	120	120	120	120	120	120	120	119
105	120	120	120	120	120	120	120	120	119
106	120	120	120	120	120	120	120	120	119
107	120	120	120	120	120	120	120	120	119
108	120	120	120	120	120	120	120	120	119
109	120	120	120	120	120	120	120	120	119
110	120	120	120	120	120	120	120	120	119
111	120	120	120	120	120	120	120	120	119
112	120	120	120	120	120	120	120	120	119
113	120	120	120	120	120	120	120	120	119
114	120	120	120	120	120	120	120	120	119
115	120	120	120	120	120	120	120	120	119
116	120	120	120	120	120	120	120	120	119
117	120	120	120	120	120	120	120	120	119
118	120	120	120	120	120	120	120	120	119
119	120	120	120	120	120	120	120	120	119
120	120	120	120	120	120	120	120	120	119
121	120	120	120	120	120	120	120	120	119